Besides checking the results of DE analysis, also save the sets of DE genes out, to prepare for next step of gene set enrichment analysis.

R libraries.

library(ggplot2)
library(ggrepel)
library(ggpubr)
library(stringr)
library(MASS)
library(RColorBrewer)
library(DESeq2)

library(viridis)
library(ggpointdensity)
library(dplyr)
library(data.table)
library(ComplexHeatmap)
library(UpSetR)
library(readxl)
theme_set(theme_classic())

#path = "../../data/MorPhiC/June-2024/JAX_RNAseq_Neuroectoderm/"
path = "../../MorPhiC/data/June-2024/JAX_RNAseq_Neuroectoderm/"
dat_path = paste0(path, "processed/release110-gencode44/Tables")

Read in meta data

From searching online, my guess is the model_organ content inside the bracket should be

dorsal forebrain patterning

instead of

dorsal forebrain pattering

But leave it as it is for now.

meta = fread("data/June_2024/JAX_RNAseq_Neuroectoderm_meta_data.tsv", 
             data.table = FALSE)
dim(meta)
## [1] 174  32
meta[1:2,]
##   biomaterial_id lib_biomaterial_id differentiated_biomaterial_id
## 1    MOK20002-WT         GT23-05635          CBO-MOK20002-WT-Day7
## 2    MOK20002-WT         GT23-05625          CBO-MOK20002-WT-Day6
##    differentiated_biomaterial_description differentiation_protocol
## 1 KOLF2.2 derived cortical brain organoid                 JAXPD003
## 2 KOLF2.2 derived cortical brain organoid                 JAXPD003
##   differentiated_timepoint_value differentiated_timepoint_unit
## 1                              7                          days
## 2                              6                          days
##   differentiated_terminally_differentiated
## 1                                       No
## 2                                       No
##                                   model_organ ko_strategy ko_gene
## 1 Cortical brain (dorsal forebrain pattering)          WT      WT
## 2 Cortical brain (dorsal forebrain pattering)          WT      WT
##   library_preparation_protocol dissociation_protocol fragment_size input_amount
## 1                     JAXPL001                   N.A           410          300
## 2                     JAXPL001                   N.A           425          300
##   input_unit final_yield final_yield_unit lib_concentration
## 1        ngs       249.6              ngs              51.8
## 2        ngs       170.0              ngs              33.8
##   lib_concentration_unit PCR_cycle PCR_cycle_sample_index
## 1                     nM        10                     NA
## 2                     nM        10                     NA
##                                             file_id sequencing_protocol
## 1 RFX_WT_Day7_GT23-05635_GCACGGAC-TGCGAGAC_S55_L002            JAXPS001
## 2 RFX_WT_Day6_GT23-05625_GACCTGAA-CTCACCAA_S97_L002            JAXPS001
##                       run_id biomaterial_description
## 1 20230424_23-scbct-028-run3                 KOLF2.2
## 2 20230424_23-scbct-028-run3                 KOLF2.2
##   derived_cell_line_accession clone_id protocol_id zygosity type model_system
## 1                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
## 2                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
names(meta)
##  [1] "biomaterial_id"                          
##  [2] "lib_biomaterial_id"                      
##  [3] "differentiated_biomaterial_id"           
##  [4] "differentiated_biomaterial_description"  
##  [5] "differentiation_protocol"                
##  [6] "differentiated_timepoint_value"          
##  [7] "differentiated_timepoint_unit"           
##  [8] "differentiated_terminally_differentiated"
##  [9] "model_organ"                             
## [10] "ko_strategy"                             
## [11] "ko_gene"                                 
## [12] "library_preparation_protocol"            
## [13] "dissociation_protocol"                   
## [14] "fragment_size"                           
## [15] "input_amount"                            
## [16] "input_unit"                              
## [17] "final_yield"                             
## [18] "final_yield_unit"                        
## [19] "lib_concentration"                       
## [20] "lib_concentration_unit"                  
## [21] "PCR_cycle"                               
## [22] "PCR_cycle_sample_index"                  
## [23] "file_id"                                 
## [24] "sequencing_protocol"                     
## [25] "run_id"                                  
## [26] "biomaterial_description"                 
## [27] "derived_cell_line_accession"             
## [28] "clone_id"                                
## [29] "protocol_id"                             
## [30] "zygosity"                                
## [31] "type"                                    
## [32] "model_system"
table(meta$model_organ, meta$ko_gene)
##                                              
##                                               FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   Cortical brain (dorsal forebrain pattering)    36   36    9   27    9   36 21
table(meta$run_id, meta$ko_gene)
##                             
##                              FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   20230228_23-scbct-008          0    0    0    0    9    0  1
##   20230424_23-scbct-028-run3     0    0    0    0    0   27  3
##   20230508_23-scbct-039-run2    27    0    0    0    0    0  3
##   20230522_23-scbct-052          0   27    0    0    0    0  3
##   20230824_23-scbct-092          0    0    9    0    0    0  1
##   20240131_23-robson-020         0    0    0   27    0    0  3
##   20240307_24-robson-003         0    0    0    0    0    9  1
##   20240307_24-robson-004         0    9    0    0    0    0  3
##   20240307_24-robson-006         9    0    0    0    0    0  3
table(meta$ko_strategy, meta$ko_gene)
##      
##       FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   CE     12   12    3    9    3   12  0
##   KO     12   12    3    9    3   12  0
##   PTC    12   12    3    9    3   12  0
##   WT      0    0    0    0    0    0 21

Manually correct one mistake in the metadata

For the row with column differentiated_biomaterial_id==“CBO-MOK20004-WT-Day3”, it should have value 3 instead of the original 4 in the column differentiated_timepoint_value.

print("Before correcting the error in timepoint:")
## [1] "Before correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    0 2 2 5 4 5 2  1
meta[which(meta$differentiated_biomaterial_id=="CBO-MOK20004-WT-Day3"), 
     "differentiated_timepoint_value"] = 3

print("After correcting the error in timepoint:")
## [1] "After correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    1 1 2 5 4 5 2  1

Read in gene count data and filter genes

cts = fread(file.path(dat_path, "genesCounts.csv"), data.table = FALSE)
dim(cts)
## [1] 38592   175
cts[1:2, c(1:2, (ncol(cts)-1):ncol(cts))]
##              Name RFX_CE_F5_Day8_GT23-05641_GTCGGAGC-TTATAACC_S73_L002
## 1 ENSG00000268674                                                    0
## 2 ENSG00000271254                                                 1521
##   LHX5_CE_A1_Day3_GT23-07300_AAGTCCAA-TACTCATA_S163_L004
## 1                                                      0
## 2                                                   1270
##   LHX5_KO_E4_Day5_GT23-07318_AACGTTCC-AGTACTCC_S166_L004
## 1                                                      0
## 2                                                    746
stopifnot(setequal(names(cts)[-1], meta$file_id))

meta = meta[match(names(cts)[-1], meta$file_id),]
table(names(cts)[-1] == meta$file_id)
## 
## TRUE 
##  174
cts_dat = data.matrix(cts[,-1])
rownames(cts_dat) = cts$Name

Discard genes whose expression is 0 in more than 75% of samples

model_s = meta$model_system
table(model_s, useNA="ifany")
## model_s
## CBO 
## 174
table(meta$model_system, meta$model_organ, useNA="ifany")
##      
##       Cortical brain (dorsal forebrain pattering)
##   CBO                                         174
get_q75 <- function(x){quantile(x, probs = 0.75)}

# given that there is only one level for model_system
# the result from apply(cts_dat, 1, tapply, model_s, min) is no longer a matrix
# but a vector
# do not do transpose to the results from apply(cts_dat, 1, tapply, model_s, min)
gene_info = data.frame(Name = cts$Name, 
                       apply(cts_dat, 1, tapply, model_s, min), 
                       apply(cts_dat, 1, tapply, model_s, median), 
                       apply(cts_dat, 1, tapply, model_s, get_q75), 
                       apply(cts_dat, 1, tapply, model_s, max))

dim(gene_info)
## [1] 38592     5
gene_info[1:2, ]
##                            Name apply.cts_dat..1..tapply..model_s..min.
## ENSG00000268674 ENSG00000268674                                       0
## ENSG00000271254 ENSG00000271254                                     426
##                 apply.cts_dat..1..tapply..model_s..median.
## ENSG00000268674                                          0
## ENSG00000271254                                       1318
##                 apply.cts_dat..1..tapply..model_s..get_q75.
## ENSG00000268674                                        0.00
## ENSG00000271254                                     1701.75
##                 apply.cts_dat..1..tapply..model_s..max.
## ENSG00000268674                                       3
## ENSG00000271254                                    3117
table(row.names(gene_info)==gene_info$Name)
## 
##  TRUE 
## 38592
names(gene_info)[2:5] = paste0(rep(c("CBO_"), times=4), 
                               rep(c("min", "median", "q75", "max"), each=1))
dim(gene_info)
## [1] 38592     5
gene_info[1:2,]
##                            Name CBO_min CBO_median CBO_q75 CBO_max
## ENSG00000268674 ENSG00000268674       0          0    0.00       3
## ENSG00000271254 ENSG00000271254     426       1318 1701.75    3117
summary(gene_info[,-1])
##     CBO_min          CBO_median          CBO_q75            CBO_max       
##  Min.   :    0.0   Min.   :     0.0   Min.   :     0.0   Min.   :      0  
##  1st Qu.:    0.0   1st Qu.:     0.0   1st Qu.:     0.0   1st Qu.:      2  
##  Median :    0.0   Median :     3.0   Median :     5.8   Median :     22  
##  Mean   :  213.8   Mean   :   699.1   Mean   :   931.4   Mean   :   1829  
##  3rd Qu.:   89.0   3rd Qu.:   332.1   3rd Qu.:   474.6   3rd Qu.:   1003  
##  Max.   :88969.0   Max.   :489181.5   Max.   :586205.0   Max.   :1301990
table(gene_info$CBO_q75 > 0)
## 
## FALSE  TRUE 
## 14390 24202
w2kp = which(gene_info$CBO_q75 > 0)
cts_dat = cts_dat[w2kp,]
dim(cts_dat)
## [1] 24202   174
gene_info = gene_info[w2kp,]

Read in gene annotation

gene_anno = fread("data/gencode_v44_primary_assembly_info.tsv")
dim(gene_anno)
## [1] 62754     8
gene_anno[1:2,]
##                geneId    chr strand     start       end ensembl_gene_id
##                <char> <char> <char>     <int>     <int>          <char>
## 1: ENSG00000000003.16   chrX      - 100627108 100639991 ENSG00000000003
## 2:  ENSG00000000005.6   chrX      + 100584936 100599885 ENSG00000000005
##    hgnc_symbol                                       description
##         <char>                                            <char>
## 1:      TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## 2:        TNMD   tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
# all genes are contained in the annotation file 
table(gene_info$Name %in% gene_anno$ensembl_gene_id)
## 
##  TRUE 
## 24202
setdiff(gene_info$Name, gene_anno$ensembl_gene_id)
## character(0)
gene_info = merge(gene_anno, gene_info, by.x="ensembl_gene_id", by.y = "Name", 
                  all.x = FALSE, all.y = TRUE)
dim(gene_info)
## [1] 24202    12
gene_info[1:2,]
## Key: <ensembl_gene_id>
##    ensembl_gene_id             geneId    chr strand     start       end
##             <char>             <char> <char> <char>     <int>     <int>
## 1: ENSG00000000003 ENSG00000000003.16   chrX      - 100627108 100639991
## 2: ENSG00000000005  ENSG00000000005.6   chrX      + 100584936 100599885
##    hgnc_symbol                                       description CBO_min
##         <char>                                            <char>   <int>
## 1:      TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]     744
## 2:        TNMD   tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]       0
##    CBO_median CBO_q75 CBO_max
##         <num>   <num>   <int>
## 1:     2644.0 3235.00    5449
## 2:        9.5   20.75      97
gene_info[which(!gene_info$ensembl_gene_id%in%gene_anno$ensembl_gene_id)]
## Key: <ensembl_gene_id>
## Empty data.table (0 rows and 12 cols): ensembl_gene_id,geneId,chr,strand,start,end...
gene_info[gene_info$CBO_min > 2e4, c("CBO_min", "hgnc_symbol")]
##    CBO_min hgnc_symbol
##      <int>      <char>
## 1:   22057        ACTB
## 2:   23713     HNRNPA1
## 3:   66180      EEF1A1
## 4:   23858        TUBB
## 5:   88639      MT-CO1
## 6:   29947      MT-ND4
## 7:   39231      MT-CO3
## 8:   88969      MALAT1
gene_info$gene_symbol = gene_info$hgnc_symbol

table(gene_info$gene_symbol == "")
## 
## FALSE  TRUE 
## 19133  5069
table(is.na(gene_info$gene_symbol))
## 
## FALSE 
## 24202
wEns = which(gene_info$gene_symbol == "" | is.na(gene_info$gene_symbol))
gene_info$gene_symbol[wEns] = gene_info$ensembl_gene_id[wEns]

table(gene_info$gene_symbol == "")
## 
## FALSE 
## 24202
table(is.na(gene_info$gene_symbol))
## 
## FALSE 
## 24202

Prepare for reading in DE results

release_name = "2024_06_JAX_RNAseq_Neuroectoderm"
result_dir = file.path("results", release_name)
processed_output_dir = file.path(result_dir, "processed")

all_result_files = list.files(path=processed_output_dir, pattern=".tsv", 
                              all.files=TRUE, full.names=FALSE)
all_result_files = sort(all_result_files)

extract_cores <- function(x){
  x_split = str_split(x, "_")[[1]]
  x_start = 6
  x_end = length(x_split)-1
  x_d_group = paste(x_split[x_start:(x_end-1)], collapse="_")
  x_gene = x_split[x_end]
  return(c(x_d_group, x_gene))
}

df_file_info = NULL

for (x in all_result_files){
  df_file_info = rbind(df_file_info, extract_cores(x))
}

df_file_info = as.data.frame(df_file_info)
colnames(df_file_info) = c("DE_group", "gene")
df_file_info
##       DE_group  gene
## 1  CBO_day_3_4  LHX5
## 2    CBO_day_5 FEZF2
## 3    CBO_day_5  LHX5
## 4    CBO_day_6 FEZF2
## 5    CBO_day_6  LHX5
## 6    CBO_day_6  RFX4
## 7    CBO_day_7 FEZF2
## 8    CBO_day_7  OTX1
## 9    CBO_day_7  PAX6
## 10   CBO_day_7  RFX4
## 11   CBO_day_8 FEZF2
## 12   CBO_day_8  OTX1
## 13   CBO_day_8  RFX4
## 14   CBO_day_9  OTX1
## 15   CBO_day_9  RFX4

Compare the number of DE genes (different ko strategies)

For this dataset, there is only one model system, and for DE group (day) and each knockout gene, there are three knockout strategies.

Do it separately for each DE group (day).

df_file_info$items = paste(df_file_info$DE_group, 
                           df_file_info$gene, sep="_")
df_file_info$items
##  [1] "CBO_day_3_4_LHX5" "CBO_day_5_FEZF2"  "CBO_day_5_LHX5"   "CBO_day_6_FEZF2" 
##  [5] "CBO_day_6_LHX5"   "CBO_day_6_RFX4"   "CBO_day_7_FEZF2"  "CBO_day_7_OTX1"  
##  [9] "CBO_day_7_PAX6"   "CBO_day_7_RFX4"   "CBO_day_8_FEZF2"  "CBO_day_8_OTX1"  
## [13] "CBO_day_8_RFX4"   "CBO_day_9_OTX1"   "CBO_day_9_RFX4"
fc0 = log2(1.5)
p0  = 0.05

gene_symbols = df_file_info$gene
gene_symbols
##  [1] "LHX5"  "FEZF2" "LHX5"  "FEZF2" "LHX5"  "RFX4"  "FEZF2" "OTX1"  "PAX6" 
## [10] "RFX4"  "FEZF2" "OTX1"  "RFX4"  "OTX1"  "RFX4"
table(gene_symbols %in% gene_info$gene_symbol)
## 
## TRUE 
##   15
genes_w_multi_ensembl = names(which(table(gene_info$gene_symbol)>1))
genes_w_multi_ensembl
## [1] "LINC01238"
sorted_de_grps = sort(unique(df_file_info$DE_group))
  
for (d_group in sorted_de_grps){
  
  df_file_info_dg = df_file_info[which(df_file_info$DE_group==d_group), ]
  
  df = data.frame(KO = df_file_info_dg$items, 
                  gene_symbol = df_file_info_dg$gene, 
                  CE_nDE = NA, KO_nDE = NA, PTC_nDE = NA, 
                  CE_padj = NA, CE_log2FoldChange = NA, 
                  KO_padj = NA, KO_log2FoldChange = NA, 
                  PTC_padj = NA, PTC_log2FoldChange = NA)

  df$Model = str_extract(df$KO, "^[a-zA-Z0-9]+")
  
  gene_ids = gene_info$ensembl_gene_id[match(df_file_info_dg$gene, gene_info$gene_symbol)]
    
  for (k in 1:nrow(df_file_info_dg)){
      
    res = fread(file.path(processed_output_dir,
                          paste0(release_name, "_", df_file_info_dg$items[k], "_DEseq2.tsv")),
                          data.table = FALSE)
    print(res[1:2,1:15])
    
    gene_id = gene_ids[k]
    w_gene = which(res$gene_ID == gene_id)
    stopifnot(length(w_gene) == 1)
    stopifnot(!(df$gene_symbol[k]%in%genes_w_multi_ensembl))
    
    for(ko1 in c("CE", "KO", "PTC")){
      
      fc = paste0(df$gene_symbol[k], "_", ko1, "_log2FoldChange")
      padj = paste0(df$gene_symbol[k], "_", ko1, "_padj")
      col_name = paste0(ko1, "_nDE")
      df[k,col_name] = sum(abs(res[[fc]]) > fc0 & res[[padj]] < p0, na.rm = TRUE)
      
      col_name = paste0(ko1, "_log2FoldChange")
      df[k,col_name] = res[[fc]][w_gene]
      
      col_name = paste0(ko1, "_padj")
      df[k,col_name] = res[[padj]][w_gene]
      
    }
    
  }
  
  print(df)

  p1 <- ggplot(df, aes(x=(PTC_nDE), y=(CE_nDE), 
                 label=gene_symbol)) + 
        geom_point() + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("# of DE genes") + 
        labs(x="PTC", y="CE") + 
        geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")

  p2 <- ggplot(df, aes(x=(PTC_nDE), y=(KO_nDE),  
                 label=gene_symbol)) + 
        geom_point() + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("# of DE genes") + 
        labs(x="PTC", y="KO") + 
        geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")
  
  p3 <- ggplot(df, aes(x=CE_log2FoldChange, y=-log10(CE_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, CE") + 
        labs(x="log2 fold change", y="-log10(adjusted p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")
  
  p4 <- ggplot(df, aes(x=KO_log2FoldChange, y=-log10(KO_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, KO") + 
        labs(x="log2 fold change", y="-log10(adjusted p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")
  
  p5 <- ggplot(df, aes(x=PTC_log2FoldChange, y=-log10(PTC_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, PTC") + 
        labs(x="log2 fold change", y="-log10(adjusted p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")

 g_combined <- ggarrange(p1, p2, p3, p4, p5, ncol = 2, nrow = 3)
 print(annotate_figure(g_combined,
                      top = text_grob(d_group, 
                                      face = "bold", size = 16)))
  
  
}
##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1                 0.0376          0.707        0.957                  0.025
## 2                 1.4400          0.628           NA                  0.106
##   LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1          0.802            1                  0.0541           0.588
## 2          0.972           NA                 -0.3200           0.915
##   LHX5_PTC_padj
## 1         0.974
## 2         0.996
##                 KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_3_4_LHX5        LHX5     10      7      11   0.928             0.455
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1       1             -1.12    0.855              -2.07   CBO
##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1                   0.178           0.127         0.393                   0.167
## 2                  -1.600           0.597            NA                  -1.490
##   FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1           0.151         0.654                    0.149            0.199
## 2           0.619            NA                    0.558            0.846
##   FEZF2_PTC_padj
## 1          0.585
## 2             NA
##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1                  0.216         0.0788        0.698                  0.225
## 2                  2.600         0.4470           NA                  3.900
##   LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1         0.0673        0.276                   0.210          0.0888
## 2         0.2470           NA                  -0.257          0.9420
##   LHX5_PTC_padj
## 1             1
## 2            NA
##                KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_5_FEZF2       FEZF2    182      9      22      NA            -4.640
## 2  CBO_day_5_LHX5        LHX5      2    172       1    0.96             0.101
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1      NA           -5.6900       NA              -1.05   CBO
## 2   0.978           -0.0183   0.0949              -1.06   CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).

##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1                   0.209           0.283         0.682                    0.11
## 2                  -3.020           0.453            NA                  -17.60
##   FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1        5.71e-01             1                    0.188            0.334
## 2        1.56e-05            NA                   -0.995            0.800
##   FEZF2_PTC_padj
## 1              1
## 2             NA
##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1                  0.244         0.0915        0.902                 0.0231
## 2                  6.470         0.0439           NA                 7.7500
##   LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1         0.8890            1                 0.00553          0.9700
## 2         0.0312           NA                 7.28000          0.0225
##   LHX5_PTC_padj
## 1             1
## 2            NA
##           gene_ID hgnc_symbol        chr strand start   end
## 1 ENSG00000271254             KI270711.1      -  4612 29626
## 2 ENSG00000276345             KI270721.1      +  2585 11802
##   RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1                 -0.127          0.522            1                -0.0382
## 2                  6.280          0.157           NA               -12.7000
##   RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1        0.84500        0.977                  -0.127           0.519
## 2        0.00486           NA                   5.210           0.240
##   RFX4_PTC_padj
## 1             1
## 2            NA
##                KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_6_FEZF2       FEZF2     37      6       9      NA            -2.120
## 2  CBO_day_6_LHX5        LHX5     72      1       3   0.551            -1.290
## 3  CBO_day_6_RFX4        RFX4      6     12      10   1.000            -0.625
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1  0.0023            -6.270        1            -0.0381   CBO
## 2  1.0000            -0.423        1            -0.3610   CBO
## 3  0.9570             0.569        1            -0.8490   CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).

##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1                 -0.1590           0.372         0.686                 -0.0756
## 2                 -0.0632           0.822         0.935                  0.1980
##   FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1           0.672             1                  -0.0741            0.678
## 2           0.482             1                   0.3160            0.261
##   FEZF2_PTC_padj
## 1              1
## 2              1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1                  0.434         0.0177            1                 0.2230
## 2                 -0.321         0.2610            1                -0.0552
##   OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1          0.224            1                  0.0815           0.658
## 2          0.846            1                 -0.1100           0.698
##   OTX1_PTC_padj
## 1             1
## 2             1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   PAX6_CE_log2FoldChange PAX6_CE_pvalue PAX6_CE_padj PAX6_KO_log2FoldChange
## 1                 0.0158          0.929            1                 0.0621
## 2                 0.0929          0.741            1                -0.0896
##   PAX6_KO_pvalue PAX6_KO_padj PAX6_PTC_log2FoldChange PAX6_PTC_pvalue
## 1          0.726            1                  -0.121           0.493
## 2          0.750            1                   0.290           0.302
##   PAX6_PTC_padj
## 1             1
## 2             1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1                -0.1020          0.570            1              -0.000999
## 2                -0.0515          0.855            1              -0.022300
##   RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1          0.996            1                  -0.115           0.521
## 2          0.937            1                   0.114           0.685
##   RFX4_PTC_padj
## 1             1
## 2             1
##                KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_7_FEZF2       FEZF2      1      3       0      NA            -0.296
## 2  CBO_day_7_OTX1        OTX1      2      1       4       1             0.606
## 3  CBO_day_7_PAX6        PAX6      0      0       0       1            -0.567
## 4  CBO_day_7_RFX4        RFX4      0      0       0       1            -1.290
##    KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 2.63e-06            -4.010        1             1.6100   CBO
## 2 1.00e+00             0.468        1            -0.0452   CBO
## 3 1.00e+00             1.440        1            -1.2700   CBO
## 4 1.00e+00            -0.839        1            -0.9720   CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).

##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1                 -0.0599          0.5730         1.000                -0.00493
## 2                  0.3580          0.0526         0.996                 0.27000
##   FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1           0.967             1                  -0.0192            0.864
## 2           0.193             1                   0.1760            0.369
##   FEZF2_PTC_padj
## 1              1
## 2              1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1                  0.239          0.236        0.921                 0.0075
## 2                 -0.551          0.122        0.833                 0.0124
##   OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1          0.963            1                  -0.183           0.250
## 2          0.966            1                  -0.163           0.567
##   OTX1_PTC_padj
## 1             1
## 2             1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1                  0.138         0.3800            1                  0.281
## 2                 -0.487         0.0667            1                 -0.399
##   RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1         0.0724            1                   0.038          0.8080
## 2         0.1300            1                  -0.453          0.0852
##   RFX4_PTC_padj
## 1             1
## 2             1
##                KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_8_FEZF2       FEZF2      0      5       2   1.000            -0.341
## 2  CBO_day_8_OTX1        OTX1     10     37      13   0.283             1.280
## 3  CBO_day_8_RFX4        RFX4      8     12      11   1.000            -1.060
##    KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1.75e-25            -3.710   0.0916              0.807   CBO
## 2 6.74e-01             0.714   1.0000              0.327   CBO
## 3 1.00e+00            -0.431   1.0000             -1.370   CBO

##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1                  0.171          0.285            1                  0.106
## 2                 -0.201          0.470            1                  0.236
##   OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1          0.533            1                  0.0774           0.630
## 2          0.420            1                 -0.1980           0.478
##   OTX1_PTC_padj
## 1             1
## 2             1
##           gene_ID hgnc_symbol        chr strand  start    end
## 1 ENSG00000271254             KI270711.1      -   4612  29626
## 2 ENSG00000277196             KI270734.1      - 138082 161852
##   RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1                 0.2880         0.0774            1                -0.0742
## 2                -0.0357         0.8770            1                 0.3340
##   RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1          0.649            1                   0.212           0.200
## 2          0.144            1                   0.242           0.303
##   RFX4_PTC_padj
## 1             1
## 2             1
##               KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_9_OTX1        OTX1     37     40      44       1             0.613
## 2 CBO_day_9_RFX4        RFX4     37     56      33       1            -1.160
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1   1.000             0.447        1              0.383   CBO
## 2   0.269            -2.670        1             -1.230   CBO

Generate valcano plots

Also output the DE gene sets for the gene set enrichment analysis.

In some (day group, knockout gene, knockout strategy) combinations, there are too many DE genes and they cannot all be labeled in the volcano plots.

df_file_info
##       DE_group  gene            items
## 1  CBO_day_3_4  LHX5 CBO_day_3_4_LHX5
## 2    CBO_day_5 FEZF2  CBO_day_5_FEZF2
## 3    CBO_day_5  LHX5   CBO_day_5_LHX5
## 4    CBO_day_6 FEZF2  CBO_day_6_FEZF2
## 5    CBO_day_6  LHX5   CBO_day_6_LHX5
## 6    CBO_day_6  RFX4   CBO_day_6_RFX4
## 7    CBO_day_7 FEZF2  CBO_day_7_FEZF2
## 8    CBO_day_7  OTX1   CBO_day_7_OTX1
## 9    CBO_day_7  PAX6   CBO_day_7_PAX6
## 10   CBO_day_7  RFX4   CBO_day_7_RFX4
## 11   CBO_day_8 FEZF2  CBO_day_8_FEZF2
## 12   CBO_day_8  OTX1   CBO_day_8_OTX1
## 13   CBO_day_8  RFX4   CBO_day_8_RFX4
## 14   CBO_day_9  OTX1   CBO_day_9_OTX1
## 15   CBO_day_9  RFX4   CBO_day_9_RFX4
fc0
## [1] 0.5849625
p0
## [1] 0.05
for(k in 1:nrow(df_file_info)){
  
  it_k = df_file_info$items[k]
  d_group = df_file_info$DE_group[k]
  gene_name = df_file_info$gene[k]
  
  # create a list to store DE gene set for the gene set enrichment analysis
  de_gene_sets = list() 
  
  print(it_k)
  
  res = fread(file.path(processed_output_dir,
                        paste0(release_name, "_", it_k, "_DEseq2.tsv")),
              data.table = FALSE)

  for (ko1 in c("CE", "KO", "PTC")){

    res_k = res[,c(1, grep(ko1, names(res)))]
    names(res_k) = gsub(paste0(gene_name, "_", ko1, "_"), "", names(res_k))

    ww_up = which((res_k$log2FoldChange > fc0) & (res_k$padj < p0))
    ww_down = which((res_k$log2FoldChange < ((-1)*fc0)) & (res_k$padj < p0))

    de_gene_sets[[paste0(gene_name, "_", ko1, "_up")]] = res_k[ww_up,]$gene_ID
    de_gene_sets[[paste0(gene_name, "_", ko1, "_down")]] = res_k[ww_down,]$gene_ID
    
    res_k$DE = "No"
    res_k$DE[ww_up] <- "Up"
    res_k$DE[ww_down] <- "Down"
    
    res_k$log2FoldChange[which(res_k$log2FoldChange > 3)] = 3
    res_k$log2FoldChange[which(res_k$log2FoldChange < -3)] = -3
  
    col2use = c("blue", "grey", "red")
    names(col2use) = c("Down", "No", "Up")
    
    res_k$delabel = NA
    res_k$gene_symbol = gene_info$gene_symbol[match(res_k$gene_ID, 
                                                    gene_info$ensembl_gene_id)]
    w_de = which(res_k$DE != "No")
    res_k$delabel[w_de] = res_k$gene_symbol[w_de]
  
    print(table(res_k$DE))
    
    g2 = ggplot(res_k, aes(x=log2FoldChange, y=-log10(padj), 
                      col=DE, label=delabel)) + 
           geom_point() + ggtitle(paste0(d_group, "\n", gene_name, "_", ko1)) + 
           geom_text_repel(point.padding = 0.5,
                           min.segment.length = 0, 
                           size = 3,  # Adjust text size
                           box.padding = 0.5,
                           max.overlaps = 20, 
                           show.legend = FALSE) + 
           scale_color_manual(values=col2use)
      
    print(g2)

  }
  
  # save out the DE gene lists for gene set enrichment analysis
  DE_set_output_dir = sprintf("%s/DE_gene_lists", result_dir)
                       
    
  if (!file.exists(DE_set_output_dir)){
    dir.create(DE_set_output_dir)
  }
  
  saveRDS(de_gene_sets, 
          sprintf("%s/%s_%s_DE_gene_list.rds", 
                  DE_set_output_dir, release_name, it_k))
  
}
## [1] "CBO_day_3_4_LHX5"
## 
##  Down    No    Up 
##     6 23232     4
## Warning: Removed 2138 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23232 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No 
##     7 23235
## Warning: Removed 1891 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23235 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##    10 23231     1
## Warning: Removed 2332 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23231 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_5_FEZF2"
## 
##  Down    No    Up 
##    54 23385   128
## Warning: Removed 10510 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23385 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 163 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     2 23558     7
## Warning: Removed 14164 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23558 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     4 23545    18
## Warning: Removed 12338 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23545 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "CBO_day_5_LHX5"
## 
##  Down    No 
##     2 23565
## Warning: Removed 5037 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23565 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##    13 23395   159
## Warning: Removed 12793 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23395 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 157 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No 
##     1 23566
## Warning: Removed 825 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23566 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_6_FEZF2"
## 
##  Down    No    Up 
##    15 23874    22
## Warning: Removed 9739 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23874 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     1 23905     5
## Warning: Removed 2004 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23905 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23902     8
## Warning: Removed 1924 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23902 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_6_LHX5"
## 
##  Down    No    Up 
##    16 23839    56
## Warning: Removed 1732 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23839 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##    No    Up 
## 23910     1
## Warning: Removed 1658 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23910 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No    Up 
## 23908     3
## Warning: Removed 1694 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23908 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_6_RFX4"
## 
##  Down    No    Up 
##     1 23905     5
## Warning: Removed 1998 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23905 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     2 23899    10
## Warning: Removed 1971 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23899 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No    Up 
## 23901    10
## Warning: Removed 1899 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23901 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_7_FEZF2"
## 
##  Down    No 
##     1 23664
## Warning: Removed 14223 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23664 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     2 23662     1
## Warning: Removed 1712 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23662 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No 
## 23665
## Warning: Removed 1738 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_7_OTX1"
## 
##    No    Up 
## 23663     2
## Warning: Removed 1669 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23663 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No    Up 
## 23664     1
## Warning: Removed 1601 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23664 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23661     3
## Warning: Removed 1746 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23661 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_7_PAX6"
## 
##    No 
## 23665
## Warning: Removed 1971 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No 
## 23665
## Warning: Removed 1964 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No 
## 23665
## Warning: Removed 1894 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_7_RFX4"
## 
##    No 
## 23665
## Warning: Removed 1872 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No 
## 23665
## Warning: Removed 1924 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##    No 
## 23665
## Warning: Removed 1900 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_8_FEZF2"
## 
##    No 
## 23268
## Warning: Removed 2112 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23268 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     2 23263     3
## Warning: Removed 2236 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23263 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23266     1
## Warning: Removed 2213 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23266 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_8_OTX1"
## 
##  Down    No    Up 
##     3 23258     7
## Warning: Removed 2052 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23258 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     9 23231    28
## Warning: Removed 2292 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23231 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     5 23255     8
## Warning: Removed 2349 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23255 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_8_RFX4"
## 
##  Down    No    Up 
##     2 23260     6
## Warning: Removed 2216 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23260 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     3 23256     9
## Warning: Removed 2241 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23256 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## 
##  Down    No    Up 
##     2 23257     9
## Warning: Removed 2259 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23257 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

## [1] "CBO_day_9_OTX1"
## 
##  Down    No    Up 
##     9 22886    28
## Warning: Removed 702 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22886 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    13 22883    27
## Warning: Removed 840 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22883 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    13 22879    31
## Warning: Removed 747 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22879 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "CBO_day_9_RFX4"
## 
##  Down    No    Up 
##     8 22886    29
## Warning: Removed 813 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22886 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    15 22867    41
## Warning: Removed 732 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22867 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     9 22890    24
## Warning: Removed 838 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22890 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  7855839 419.6   12594381 672.7         NA 12594381 672.7
## Vcells 22783465 173.9   57594827 439.5      65536 47929023 365.7
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.3                UpSetR_1.4.0               
##  [3] ComplexHeatmap_2.14.0       data.table_1.15.4          
##  [5] dplyr_1.1.4                 ggpointdensity_0.1.0       
##  [7] viridis_0.6.5               viridisLite_0.4.2          
##  [9] DESeq2_1.38.3               SummarizedExperiment_1.28.0
## [11] Biobase_2.58.0              MatrixGenerics_1.10.0      
## [13] matrixStats_1.3.0           GenomicRanges_1.50.2       
## [15] GenomeInfoDb_1.34.9         IRanges_2.32.0             
## [17] S4Vectors_0.36.2            BiocGenerics_0.44.0        
## [19] RColorBrewer_1.1-3          MASS_7.3-60                
## [21] stringr_1.5.1               ggpubr_0.6.0               
## [23] ggrepel_0.9.5               ggplot2_3.5.1              
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-8           bit64_4.0.5            doParallel_1.0.17     
##  [4] httr_1.4.7             tools_4.2.3            backports_1.5.0       
##  [7] bslib_0.8.0            utf8_1.2.4             R6_2.5.1              
## [10] DBI_1.2.3              colorspace_2.1-1       GetoptLong_1.0.5      
## [13] withr_3.0.1            tidyselect_1.2.1       gridExtra_2.3         
## [16] bit_4.0.5              compiler_4.2.3         cli_3.6.3             
## [19] DelayedArray_0.24.0    labeling_0.4.3         sass_0.4.9            
## [22] scales_1.3.0           digest_0.6.37          rmarkdown_2.28        
## [25] XVector_0.38.0         pkgconfig_2.0.3        htmltools_0.5.8.1     
## [28] highr_0.11             fastmap_1.2.0          GlobalOptions_0.1.2   
## [31] rlang_1.1.4            rstudioapi_0.16.0      RSQLite_2.3.7         
## [34] farver_2.1.2           shape_1.4.6.1          jquerylib_0.1.4       
## [37] generics_0.1.3         jsonlite_1.8.8         BiocParallel_1.32.6   
## [40] car_3.1-2              RCurl_1.98-1.16        magrittr_2.0.3        
## [43] GenomeInfoDbData_1.2.9 Matrix_1.5-4.1         Rcpp_1.0.13           
## [46] munsell_0.5.1          fansi_1.0.6            abind_1.4-5           
## [49] lifecycle_1.0.4        stringi_1.8.4          yaml_2.3.10           
## [52] carData_3.0-5          zlibbioc_1.44.0        plyr_1.8.9            
## [55] blob_1.2.4             parallel_4.2.3         crayon_1.5.3          
## [58] lattice_0.22-6         cowplot_1.1.3          Biostrings_2.66.0     
## [61] annotate_1.76.0        circlize_0.4.16        KEGGREST_1.38.0       
## [64] locfit_1.5-9.10        knitr_1.48             pillar_1.9.0          
## [67] rjson_0.2.21           ggsignif_0.6.4         geneplotter_1.76.0    
## [70] codetools_0.2-20       XML_3.99-0.17          glue_1.7.0            
## [73] evaluate_0.24.0        foreach_1.5.2          vctrs_0.6.5           
## [76] png_0.1-8              cellranger_1.1.0       gtable_0.3.5          
## [79] purrr_1.0.2            tidyr_1.3.1            clue_0.3-65           
## [82] cachem_1.1.0           xfun_0.47              xtable_1.8-4          
## [85] broom_1.0.6            rstatix_0.7.2          tibble_3.2.1          
## [88] iterators_1.0.14       AnnotationDbi_1.60.2   memoise_2.0.1         
## [91] cluster_2.1.6